read AST data - from NCBI

ec<-read_delim("Ecoli_AST.tsv.gz") %>% rename(BioSample=`#BioSample`)

length(unique(ec$BioSample))
## [1] 9094
ec %>% group_by(BioSample) %>% summarise(n=length(unique((Antibiotic))))%>% pull(n) %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   15.00   15.33   18.00   36.00

read in refgenes DB

refgenes <- read_delim("refgenes_20241003.tsv")

read Enterobase AMR calls - E. coli

ec_bin <-read_delim("ecoli_output-full_binaryAMR.csv.gz")

ec_gene_counts <- ec_bin %>% select(-uberstrain, -assembly_barcode) %>% colSums() %>% enframe() %>% rename(gene=name, count=value)

ec_gene_counts <- ec_gene_counts %>% left_join(refgenes, by=join_by("gene"=="#Allele")) %>% filter(`Whitelisted taxa`=="Escherichia" | is.na(`Whitelisted taxa`))

ec_gene_counts_alleleMatch <- ec_gene_counts %>% filter(!is.na(Class))
  
ec_gene_counts_noAlleleMatch <- ec_gene_counts %>% filter(is.na(Class)) %>% select(gene,count)
ec_gene_counts_geneFamilyMatch <- ec_gene_counts_noAlleleMatch %>% left_join(refgenes, by=join_by("gene"=="Gene family")) %>% 
  filter(`Whitelisted taxa`=="Escherichia" | is.na(`Whitelisted taxa`)) %>% group_by(gene) %>% filter(row_number()==1)

ec_gene_counts <- bind_rows(ec_gene_counts_alleleMatch, ec_gene_counts_geneFamilyMatch) %>% relocate(`#Allele`, .after=count)

read Enterobase full to get accession, pathotype, Clermont type for E. coli

ec_full <-read_delim("ecoli-output-full.csv.gz") %>% 
  select(uberstrain, assembly_barcode, sample_accession, secondary_sample_accession, country, amrfinder_db_version, amrfinder_version, Country, Pathovar, `Clermont Type (ClermonTyping)`, `Clermont Type (EzClermont)`)

ec_amr_meta <- left_join(ec_full, ec_bin, by=c("uberstrain", "assembly_barcode"))

get AMRfinderplus and AST data where both are available

sum(unique(ec$BioSample) %in% ec_amr_meta$sample_accession)
## [1] 6987
sum(unique(ec$BioSample) %in% ec_amr_meta$secondary_sample_accession)
## [1] 0
# AST data for strains with matching AMRfinder calls from Enterobase
ec_amrfinder_ast <- ec %>% filter(BioSample %in% ec_full$sample_accession)

# AMRfinder binary matrix from Enterobase, for strains with matching AST data from NCBI
amrfinder_full <- ec_amr_meta %>% filter(sample_accession %in% ec$BioSample)

distribution of common core-gene mutations, by Pathovar / Clermont Type

pmrB_Y358N <- summarise_marker_dist_ecoli(ec_amr_meta=ec_amr_meta, marker="pmrB_Y358N")
glpT_E448K <- summarise_marker_dist_ecoli(ec_amr_meta=ec_amr_meta, marker="glpT_E448K")

(glpT_E448K$clermont_plot + pmrB_Y358N$clermont_plot + plot_layout(axes="collect")) / 
(glpT_E448K$pathovar_plot + pmrB_Y358N$pathovar_plot + plot_layout(axes="collect"))

solo markers for ceftriaxone

cef_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftriaxone", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

cef_cephalosporin$solo_stats
## # A tibble: 64 × 8
##    gene       total category      n      p     se ci.lower ci.upper
##    <chr>      <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T     2 resistant     0 0      0       0          0    
##  2 ampC_C-42T    28 resistant     1 0.0357 0.0351  0          0.104
##  3 ampC_T-32A    50 resistant     4 0.08   0.0384  0.00480    0.155
##  4 blaCMY         6 resistant     3 0.5    0.204   0.0999     0.900
##  5 blaCMY-17      1 resistant     1 1      0       1          1    
##  6 blaCMY-2     138 resistant   136 0.986  0.0102  0.966      1    
##  7 blaCMY-4       4 resistant     3 0.75   0.217   0.326      1    
##  8 blaCMY-42      1 resistant     1 1      0       1          1    
##  9 blaCMY-6       1 resistant     1 1      0       1          1    
## 10 blaCTX-M       3 resistant     2 0.667  0.272   0.133      1    
## # ℹ 54 more rows
cef_cephalosporin$combined_plot

solo markers for fosfomycin

fos_fosfomycin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="fosfomycin", refgene_subclass="FOSFOMYCIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

fos_fosfomycin$solo_stats
## # A tibble: 4 × 8
##   gene       total category           n     p    se ci.lower ci.upper
##   <chr>      <int> <chr>          <int> <dbl> <dbl>    <dbl>    <dbl>
## 1 glpT_E448K    35 resistant          0     0     0        0        0
## 2 uhpT_E350Q     2 resistant          0     0     0        0        0
## 3 glpT_E448K    35 nonsusceptible     0     0     0        0        0
## 4 uhpT_E350Q     2 nonsusceptible     0     0     0        0        0
fos_fosfomycin$combined_plot

solo markers for colistin

colistin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="colistin", refgene_subclass="COLISTIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

colistin$solo_stats
## # A tibble: 6 × 8
##   gene       total category           n     p     se ci.lower ci.upper
##   <chr>      <int> <chr>          <int> <dbl>  <dbl>    <dbl>    <dbl>
## 1 mcr-1.1        1 resistant          1 1     0         1        1    
## 2 pmrB_E123D    42 resistant          0 0     0         0        0    
## 3 pmrB_Y358N     7 resistant          0 0     0         0        0    
## 4 mcr-1.1        1 nonsusceptible     1 1     0         1        1    
## 5 pmrB_E123D    42 nonsusceptible    40 0.952 0.0329    0.888    1    
## 6 pmrB_Y358N     7 nonsusceptible     4 0.571 0.187     0.205    0.938
colistin$combined_plot

solo markers for ceftriaxone

cef_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftriaxone", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

cef_cephalosporin$solo_stats
## # A tibble: 64 × 8
##    gene       total category      n      p     se ci.lower ci.upper
##    <chr>      <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T     2 resistant     0 0      0       0          0    
##  2 ampC_C-42T    28 resistant     1 0.0357 0.0351  0          0.104
##  3 ampC_T-32A    50 resistant     4 0.08   0.0384  0.00480    0.155
##  4 blaCMY         6 resistant     3 0.5    0.204   0.0999     0.900
##  5 blaCMY-17      1 resistant     1 1      0       1          1    
##  6 blaCMY-2     138 resistant   136 0.986  0.0102  0.966      1    
##  7 blaCMY-4       4 resistant     3 0.75   0.217   0.326      1    
##  8 blaCMY-42      1 resistant     1 1      0       1          1    
##  9 blaCMY-6       1 resistant     1 1      0       1          1    
## 10 blaCTX-M       3 resistant     2 0.667  0.272   0.133      1    
## # ℹ 54 more rows
cef_cephalosporin$combined_plot

solo markers for gentamicin vs aminoglycosides

gentamicin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="gentamicin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

gentamicin_agly$solo_stats
## # A tibble: 36 × 8
##    gene           total category      n        p       se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 16S_A523C          0 resistant     0 NaN      NaN           NaN NaN     
##  2 16S_C1192G         1 resistant     0   0        0             0   0     
##  3 aac(2')-IIa        1 resistant     0   0        0             0   0     
##  4 aac(3)-IId        41 resistant    41   1        0             1   1     
##  5 aac(3)-IIe         3 resistant     3   1        0             1   1     
##  6 aac(3)-VIa         2 resistant     0   0        0             0   0     
##  7 aac(6')-Ib         1 resistant     0   0        0             0   0     
##  8 aac(6')-Ib-cr5    19 resistant     0   0        0             0   0     
##  9 aac(6')-Ib4        6 resistant     0   0        0             0   0     
## 10 aadA1            145 resistant     3   0.0207   0.0118        0   0.0439
## # ℹ 26 more rows
gentamicin_agly$combined_plot

solo markers for amikacin vs aminoglycosides

amikacin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="amikacin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

amikacin_agly$solo_stats
## # A tibble: 32 × 8
##    gene           total category      n        p       se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 16S_A523C          0 resistant     0 NaN      NaN           NaN NaN     
##  2 aac(2')-IIa        1 resistant     0   0        0             0   0     
##  3 aac(3)-IId        27 resistant     0   0        0             0   0     
##  4 aac(3)-IIe         3 resistant     0   0        0             0   0     
##  5 aac(6')-Ib         1 resistant     1   1        0             1   1     
##  6 aac(6')-Ib-cr5    19 resistant     2   0.105    0.0704        0   0.243 
##  7 aac(6')-Ib4        5 resistant     0   0        0             0   0     
##  8 aadA1             87 resistant     1   0.0115   0.0114        0   0.0339
##  9 aadA2             23 resistant     0   0        0             0   0     
## 10 aadA22             3 resistant     0   0        0             0   0     
## # ℹ 22 more rows
amikacin_agly$combined_plot

solo markers for tobramycin vs aminoglycosides

tobramycin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tobramycin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

tobramycin_agly$solo_stats
## # A tibble: 30 × 8
##    gene           total category      n      p     se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa        1 resistant     0 0      0         0       0     
##  2 aac(3)-IId        27 resistant     9 0.333  0.0907    0.156   0.511 
##  3 aac(3)-IIe         3 resistant     0 0      0         0       0     
##  4 aac(6')-Ib         1 resistant     1 1      0         1       1     
##  5 aac(6')-Ib-cr5    18 resistant    15 0.833  0.0878    0.661   1     
##  6 aac(6')-Ib4        5 resistant     2 0.4    0.219     0       0.829 
##  7 aadA1             87 resistant     0 0      0         0       0     
##  8 aadA2             22 resistant     1 0.0455 0.0444    0       0.132 
##  9 aadA22             3 resistant     0 0      0         0       0     
## 10 aadA5             66 resistant     2 0.0303 0.0211    0       0.0717
## # ℹ 20 more rows
tobramycin_agly$combined_plot

# solo markers for ampicillin

ampicillin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ampicillin", refgene_class="BETA-LACTAM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

ampicillin$solo_stats
## # A tibble: 84 × 8
##    gene        total category      n       p        se ci.lower ci.upper
##    <chr>       <int> <chr>     <int>   <dbl>     <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T      2 resistant     0   0       0          0            0
##  2 ampC_C-42T     20 resistant    20   1       0          1            1
##  3 ampC_G-15GG     0 resistant     0 NaN     NaN        NaN          NaN
##  4 ampC_T-32A     26 resistant    26   1       0          1            1
##  5 bla             0 resistant     0 NaN     NaN        NaN          NaN
##  6 blaCARB-2       5 resistant     5   1       0          1            1
##  7 blaCMY          4 resistant     3   0.75    0.217      0.326        1
##  8 blaCMY-17       1 resistant     1   1       0          1            1
##  9 blaCMY-2      101 resistant   100   0.990   0.00985    0.971        1
## 10 blaCMY-4        3 resistant     2   0.667   0.272      0.133        1
## # ℹ 74 more rows
ampicillin$combined_plot

# solo markers for azithromycin

azi <- solo_marker(ast=ec_amrfinder_ast, antibiotic="azithromycin", refgene_subclass="AZITHROMYCIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

azi$solo_stats
## # A tibble: 4 × 8
##   gene   total category           n     p    se ci.lower ci.upper
##   <chr>  <int> <chr>          <int> <dbl> <dbl>    <dbl>    <dbl>
## 1 mef(B)     3 resistant          0     0     0        0        0
## 2 mph(A)     7 resistant          7     1     0        1        1
## 3 mef(B)     3 nonsusceptible     0     0     0        0        0
## 4 mph(A)     7 nonsusceptible     7     1     0        1        1
azi$combined_plot

solo markers for imipenem

imipenem <- solo_marker(ast=ec_amrfinder_ast, antibiotic="imipenem", refgene_subclass="CARBAPENEM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

imipenem$solo_stats
## # A tibble: 26 × 8
##    gene       total category      n     p    se ci.lower ci.upper
##    <chr>      <int> <chr>     <int> <dbl> <dbl>    <dbl>    <dbl>
##  1 blaKPC-2       2 resistant     2 1     0        1        1    
##  2 blaKPC-3      12 resistant     8 0.667 0.136    0.400    0.933
##  3 blaKPC-4       2 resistant     1 0.5   0.354    0        1    
##  4 blaNDM-1       8 resistant     8 1     0        1        1    
##  5 blaNDM-5       5 resistant     5 1     0        1        1    
##  6 blaNDM-6       1 resistant     1 1     0        1        1    
##  7 blaNDM-7       3 resistant     3 1     0        1        1    
##  8 blaNDM-9       1 resistant     1 1     0        1        1    
##  9 blaOXA         1 resistant     0 0     0        0        0    
## 10 blaOXA-181     1 resistant     0 0     0        0        0    
## # ℹ 16 more rows
imipenem$combined_plot

solo markers for meropenem

meropenem <- solo_marker(ast=ec_amrfinder_ast, antibiotic="meropenem", refgene_subclass="CARBAPENEM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

meropenem$solo_stats
## # A tibble: 26 × 8
##    gene       total category      n     p    se ci.lower ci.upper
##    <chr>      <int> <chr>     <int> <dbl> <dbl>    <dbl>    <dbl>
##  1 blaKPC-2       2 resistant     1   0.5 0.354    0        1    
##  2 blaKPC-3      12 resistant     6   0.5 0.144    0.217    0.783
##  3 blaKPC-4       2 resistant     0   0   0        0        0    
##  4 blaNDM-1      10 resistant    10   1   0        1        1    
##  5 blaNDM-5       9 resistant     9   1   0        1        1    
##  6 blaNDM-6       1 resistant     1   1   0        1        1    
##  7 blaNDM-7       3 resistant     3   1   0        1        1    
##  8 blaNDM-9       1 resistant     1   1   0        1        1    
##  9 blaOXA         1 resistant     0   0   0        0        0    
## 10 blaOXA-181     1 resistant     0   0   0        0        0    
## # ℹ 16 more rows
meropenem$combined_plot

solo markers for ceftazidime

ceftazidime_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftazidime", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

ceftazidime_cephalosporin$solo_stats
## # A tibble: 70 × 8
##    gene         total category      n      p     se ci.lower ci.upper
##    <chr>        <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T       5 resistant     0 0      0        0        0     
##  2 ampC_C-42T      28 resistant     1 0.0357 0.0351   0        0.104 
##  3 ampC_G-15GG      4 resistant     0 0      0        0        0     
##  4 ampC_T-14TGT     2 resistant     0 0      0        0        0     
##  5 ampC_T-32A      66 resistant     3 0.0455 0.0256   0        0.0957
##  6 blaCMY           4 resistant     2 0.5    0.25     0.0100   0.99  
##  7 blaCMY-2       132 resistant   116 0.879  0.0284   0.823    0.934 
##  8 blaCMY-4         4 resistant     3 0.75   0.217    0.326    1     
##  9 blaCMY-42        3 resistant     3 1      0        1        1     
## 10 blaCMY-59        1 resistant     1 1      0        1        1     
## # ℹ 60 more rows
ceftazidime_cephalosporin$combined_plot

solo markers for chloramphenicol

chloramphenicol <- solo_marker(ast=ec_amrfinder_ast, antibiotic="chloramphenicol", refgene_class="PHENICOL", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

chloramphenicol$solo_stats
## # A tibble: 12 × 8
##    gene  total category           n       p       se ci.lower ci.upper
##    <chr> <int> <chr>          <int>   <dbl>    <dbl>    <dbl>    <dbl>
##  1 catA1    17 resistant         17   1       0         1        1    
##  2 catA2     1 resistant          1   1       0         1        1    
##  3 catB3     0 resistant          0 NaN     NaN       NaN      NaN    
##  4 cmlA1    20 resistant         16   0.8     0.0894    0.625    0.975
##  5 cmlA5     0 resistant          0 NaN     NaN       NaN      NaN    
##  6 floR     83 resistant         82   0.988   0.0120    0.964    1    
##  7 catA1    17 nonsusceptible    17   1       0         1        1    
##  8 catA2     1 nonsusceptible     1   1       0         1        1    
##  9 catB3     0 nonsusceptible     0 NaN     NaN       NaN      NaN    
## 10 cmlA1    20 nonsusceptible    17   0.85    0.0798    0.694    1    
## 11 cmlA5     0 nonsusceptible     0 NaN     NaN       NaN      NaN    
## 12 floR     83 nonsusceptible    82   0.988   0.0120    0.964    1
chloramphenicol$combined_plot

solo markers for trimethoprim

trimethoprim <- solo_marker(ast=ec_amrfinder_ast, antibiotic="trimethoprim-sulfamethoxazole", refgene_subclass="TRIMETHOPRIM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

trimethoprim$solo_stats
## # A tibble: 32 × 8
##    gene   total category      n       p       se ci.lower ci.upper
##    <chr>  <int> <chr>     <int>   <dbl>    <dbl>    <dbl>    <dbl>
##  1 dfrA1    141 resistant   111   0.787   0.0345    0.720    0.855
##  2 dfrA12   113 resistant   102   0.903   0.0279    0.848    0.957
##  3 dfrA14    69 resistant    63   0.913   0.0339    0.847    0.980
##  4 dfrA15     0 resistant     0 NaN     NaN       NaN      NaN    
##  5 dfrA16     4 resistant     1   0.25    0.217     0        0.674
##  6 dfrA17   475 resistant   449   0.945   0.0104    0.925    0.966
##  7 dfrA19     0 resistant     0 NaN     NaN       NaN      NaN    
##  8 dfrA25     2 resistant     2   1       0         1        1    
##  9 dfrA27     1 resistant     1   1       0         1        1    
## 10 dfrA29     3 resistant     3   1       0         1        1    
## # ℹ 22 more rows
trimethoprim$combined_plot

solo markers for tetracycline

tet <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tetracycline", refgene_class="TETRACYCLINE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

tet$solo_stats
## # A tibble: 14 × 8
##    gene       total category           n     p      se ci.lower ci.upper
##    <chr>      <int> <chr>          <int> <dbl>   <dbl>    <dbl>    <dbl>
##  1 acrB_R620C     3 resistant          0 0     0          0        0    
##  2 tet(A)      1036 resistant       1029 0.993 0.00255    0.988    0.998
##  3 tet(B)       835 resistant        824 0.987 0.00395    0.979    0.995
##  4 tet(C)        48 resistant         16 0.333 0.0680     0.200    0.467
##  5 tet(D)        28 resistant         27 0.964 0.0351     0.896    1    
##  6 tet(H)         2 resistant          2 1     0          1        1    
##  7 tet(M)         6 resistant          0 0     0          0        0    
##  8 acrB_R620C     3 nonsusceptible     0 0     0          0        0    
##  9 tet(A)      1036 nonsusceptible  1029 0.993 0.00255    0.988    0.998
## 10 tet(B)       835 nonsusceptible   825 0.988 0.00376    0.981    0.995
## 11 tet(C)        48 nonsusceptible    44 0.917 0.0399     0.838    0.995
## 12 tet(D)        28 nonsusceptible    27 0.964 0.0351     0.896    1    
## 13 tet(H)         2 nonsusceptible     2 1     0          1        1    
## 14 tet(M)         6 nonsusceptible     0 0     0          0        0
tet$combined_plot

solo markers for tigecycline

tig <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tigecycline", refgene_subclass="TIGECYCLINE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

tig$solo_stats
## # A tibble: 0 × 8
## # ℹ 8 variables: gene <chr>, total <int>, category <chr>, n <int>, p <dbl>,
## #   se <dbl>, ci.lower <dbl>, ci.upper <dbl>

solo markers for ciprofloxacin

cip <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ciprofloxacin", refgene_subclass="QUINOLONE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)

cip$solo_stats
## # A tibble: 46 × 8
##    gene           total category      n       p      se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 aac(6')-Ib-cr5     1 resistant     0 0       0       0          0     
##  2 gyrA_D87G          4 resistant     0 0       0       0          0     
##  3 gyrA_D87N          6 resistant     0 0       0       0          0     
##  4 gyrA_D87Y          3 resistant     1 0.333   0.272   0          0.867 
##  5 gyrA_S83A         12 resistant     0 0       0       0          0     
##  6 gyrA_S83L        122 resistant    18 0.148   0.0321  0.0846     0.210 
##  7 gyrA_S83V          1 resistant     0 0       0       0          0     
##  8 marR_S3N         536 resistant     4 0.00746 0.00372 0.000177   0.0147
##  9 parC_A56T          7 resistant     0 0       0       0          0     
## 10 parC_S57T         41 resistant     0 0       0       0          0     
## # ℹ 36 more rows
cip$combined_plot